bitkeeper revision 1.794.1.1 (405b85b44Vh_3MMuChrmhJ9H5nxbyw)
authoriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>
Fri, 19 Mar 2004 23:43:48 +0000 (23:43 +0000)
committeriap10@labyrinth.cl.cam.ac.uk <iap10@labyrinth.cl.cam.ac.uk>
Fri, 19 Mar 2004 23:43:48 +0000 (23:43 +0000)
basic shadow support

17 files changed:
.rootkeys
BitKeeper/etc/ignore
xen/arch/i386/process.c
xen/arch/i386/traps.c
xen/common/debug.c
xen/common/domain.c
xen/common/domain_page.c
xen/common/kernel.c
xen/common/memory.c
xen/common/shadow.c [new file with mode: 0644]
xen/include/asm-i386/config.h
xen/include/asm-i386/page.h
xen/include/asm-i386/processor.h
xen/include/xeno/mm.h
xen/include/xeno/perfc_defn.h
xen/include/xeno/shadow.h [new file with mode: 0644]
xen/net/dev.c

index e078d7cc52100ec9cf7e7d97fb99ff710f21a1f7..d28d693e5da9cbeac62967f9b7ed08ff7727ff64 100644 (file)
--- a/.rootkeys
+++ b/.rootkeys
 4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c
 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
+405b8599xI_PoEr3zZoJ2on-jdn7iw xen/common/shadow.c
 3ddb79bdB9RNMnkQnUyZ5C9hhMSQQw xen/common/slab.c
 3ddb79bd0gVQYmL2zvuJnldvD0AGxQ xen/common/softirq.c
 3e7f358awXBC3Vw-wFRwPw18qL1khg xen/common/string.c
 3e4540ccU1sgCx8seIMGlahmMfv7yQ xen/include/xeno/reboot.h
 3ddb79c0LzqqS0LhAQ50ekgj4oGl7Q xen/include/xeno/sched.h
 403a06a7H0hpHcKpAiDe5BPnaXWTlA xen/include/xeno/serial.h
+405b8599BsDsDwKEJLS0XipaiQW3TA xen/include/xeno/shadow.h
 3ddb79c0VDeD-Oft5eNfMneTU3D1dQ xen/include/xeno/skbuff.h
 3ddb79c14dXIhP7C2ahnoD08K90G_w xen/include/xeno/slab.h
 3ddb79c09xbS-xxfKxuV3JETIhBzmg xen/include/xeno/smp.h
index 2a2f79ebac8c71e80a95b86476d05b54ae29e027..7a0065247bd3043abcc18956502b4ce8c9787b05 100644 (file)
@@ -548,3 +548,13 @@ tools/xentrace/xentrace
 tools/xc/lib/xc_evtchn.o
 tools/xc/py/XenoUtil.pyc
 tools/xend/xend
+tools/xc/lib/libxc.so.1.3
+tools/xc/lib/libxc.so.1.3.0
+tools/xc/lib/xc_physdev.o
+tools/xend/xend_utils.o
+xen/common/physdev.o
+xen/common/shadow.o
+xen/common/trace.o
+xen/drivers/char/console.o
+xen/drivers/char/keyboard.o
+xen/include/hypervisor-ifs/arch
index 09170307a7112bc8566889af9b53686182db0eef..8ed1cf2dc2e1870ddfbdc97db5fcec4ce0cdf0ed 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <xeno/irq.h>
 #include <xeno/event.h>
+#include <xeno/shadow.h>
 
 int hlt_counter;
 
@@ -281,7 +282,24 @@ void switch_to(struct task_struct *prev_p, struct task_struct *next_p)
     }
 
     /* Switch page tables.  */
-    write_cr3_counted(pagetable_val(next_p->mm.pagetable));
+#ifdef CONFIG_SHADOW
+
+    /*    printk("switch_to %08lx, %08lx\n", next_p->mm.pagetable,
+          next_p->mm.shadowtable);*/
+
+
+    if( next_p->mm.shadowmode )
+      {
+       write_cr3_counted(pagetable_val(next_p->mm.shadowtable));
+       check_pagetable( next_p->mm.pagetable, "switch" );
+      }
+    else
+#endif
+      write_cr3_counted(pagetable_val(next_p->mm.pagetable));
+
+
+
+
 
     set_current(next_p);
 
index f71ce60d570317a1188f5a3c0e1cb15d5c4dc7da..717ca6d2cbd57262531784e7f488962af821a90d 100644 (file)
@@ -39,6 +39,7 @@
 #include <xeno/spinlock.h>
 #include <xeno/irq.h>
 #include <xeno/perfc.h>
+#include <xeno/shadow.h>
 #include <asm/domain_page.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -323,6 +324,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
 
     __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
 
+    perfc_incrc(page_faults);
+
     if ( unlikely(addr >= LDT_VIRT_START) && 
          (addr < (LDT_VIRT_START + (p->mm.ldt_ents*LDT_ENTRY_SIZE))) )
     {
@@ -336,6 +339,18 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
             return; /* successfully copied the mapping */
     }
 
+#ifdef CONFIG_SHADOW
+//printk("1");
+check_pagetable( current->mm.pagetable, "pre-sf" );
+    if ( p->mm.shadowmode && addr < PAGE_OFFSET &&
+        shadow_fault( addr, error_code ) )
+      {
+       check_pagetable( current->mm.pagetable, "post-sfa" );
+       return; // return true if fault was handled 
+      }
+    check_pagetable( current->mm.pagetable, "post-sfb" );
+#endif
+
     if ( unlikely(!(regs->xcs & 3)) )
         goto fault_in_hypervisor;
 
@@ -353,7 +368,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, long error_code)
 
     if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
     {
-        DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup);
+        perfc_incrc(copy_user_faults);
+        //DPRINTK("copy_user fault: %08lx -> %08lx\n", regs->eip, fixup);
         regs->eip = fixup;
         regs->xds = regs->xes = regs->xfs = regs->xgs = __HYPERVISOR_DS;
         return;
index dff739d99a51a2328f12209e9c9a03966d7018fc..4e298bbfb5360ed0329ec9892bef221beff2e673 100644 (file)
@@ -91,7 +91,13 @@ int pdb_change_values(domid_t domain, u_char *buffer, unsigned long addr,
 
     if ((addr >> PAGE_SHIFT) == ((addr + length - 1) >> PAGE_SHIFT))
     {
-        l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
+#ifdef CONFIG_SHADOW
+        if (p->mm.shadowmode )
+          l2_table = map_domain_mem(pagetable_val(p->mm.shadowtable));
+       else
+#endif
+          l2_table = map_domain_mem(pagetable_val(p->mm.pagetable));
+
        l2_table += l2_table_offset(addr);
        if (!(l2_pgentry_val(*l2_table) & _PAGE_PRESENT)) 
        {
index 53cea062850c1d25b044abb3be26c11563adcf05..c63c9164e3ce5875f64ea7e9b351125161f7ef9b 100644 (file)
@@ -9,6 +9,7 @@
 #include <xeno/delay.h>
 #include <xeno/event.h>
 #include <xeno/time.h>
+#include <xeno/shadow.h>
 #include <hypervisor-ifs/dom0_ops.h>
 #include <asm/io.h>
 #include <asm/domain_page.h>
@@ -546,6 +547,10 @@ int final_setup_guestos(struct task_struct *p, dom0_builddomain_t *builddomain)
     get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 
                       PGT_l2_page_table);
 
+#ifdef CONFIG_SHADOW
+    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode);
+#endif
+
     /* Set up the shared info structure. */
     update_dom_time(p->shared_info);
 
@@ -847,6 +852,15 @@ int setup_guestos(struct task_struct *p, dom0_createdomain_t *params,
 
     set_bit(PF_CONSTRUCTED, &p->flags);
 
+#ifdef CONFIG_SHADOW
+
+printk("Engage shadow mode for dom 0\n");
+    p->mm.shadowmode = SHM_test; // XXXXX IAP
+    p->mm.shadowtable = shadow_mk_pagetable(phys_l2tab, p->mm.shadowmode );
+#endif
+
+
+
     new_thread(p, 
                (unsigned long)virt_load_address, 
                (unsigned long)virt_stack_address, 
index 5e5974562a6f9bdf0dcf408f8e863c0c6ac93ce3..723d7e33e3dc8d661b693c665d7f9686c35ebb42 100644 (file)
@@ -45,6 +45,8 @@ void *map_domain_mem(unsigned long pa)
     unsigned long *cache = mapcache;
     unsigned long flags;
 
+    perfc_incrc(map_domain_mem_count);
+
     spin_lock_irqsave(&map_lock, flags);
 
     /* Has some other CPU caused a wrap? We must flush if so. */
index b963c6f5e994d948ebc84f7cfd2913a53632af87..1737c722045decb714bc0f3aaf8c76fd9eb165ac 100644 (file)
@@ -104,6 +104,7 @@ void cmain(unsigned long magic, multiboot_info_t *mbi)
     module_t *mod;
     void *heap_start;
     int i;
+    unsigned long frametable_pages, max_mem;
 
     /* Parse the command-line options. */
     cmdline = (unsigned char *)(mbi->cmdline ? __va(mbi->cmdline) : NULL);
@@ -190,22 +191,36 @@ void cmain(unsigned long magic, multiboot_info_t *mbi)
         for ( ; ; ) ;
     }
 
-    /* The array of pfn_info structures must fit into the reserved area. */
-    if ( sizeof(struct pfn_info) > 24 )
+    frametable_pages = ((FRAMETABLE_VIRT_END - RDWR_MPT_VIRT_START)/sizeof(struct pfn_info));
+
+    if ( frametable_pages < (1<<(32-PAGE_SHIFT)) ) 
     {
-        printk("'struct pfn_info' too large to fit in Xen address space!\n");
-        for ( ; ; ) ;
+      printk("Not enough space to initialise frame table for a 4GB machine (%luMB only)\n", frametable_pages >> (20-PAGE_SHIFT));
     }
 
     set_current(&idle0_task);
 
-    max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
+    max_mem = max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
+
+    if ( max_page > frametable_pages )
+      max_page = frametable_pages;
+
     init_frametable(max_page);
-    printk("Initialised all memory on a %luMB machine\n",
-           max_page >> (20-PAGE_SHIFT));
+    printk("Initialised %luMB memory on a %luMB machine\n",
+           max_page >> (20-PAGE_SHIFT),
+          max_mem  >> (20-PAGE_SHIFT) );
 
     heap_start = memguard_init(&_end);
 
+    printk("Xen heap size is %luKB\n", 
+          (MAX_MONITOR_ADDRESS-__pa(heap_start))/1024 );
+
+    if ( ((MAX_MONITOR_ADDRESS-__pa(heap_start))/1024) <= 4096 )
+    {
+        printk("Xen heap size is too small to safely continue!\n");
+        for ( ; ; ) ;
+    }
+
     init_page_allocator(__pa(heap_start), MAX_MONITOR_ADDRESS);
  
     /* Initialise the slab allocator. */
index 32acc0ac110160ff4470fd2356c21510c1342431..e672f78fe09eae6cd7dd3274cfcf428106b5792e 100644 (file)
 #include <xeno/errno.h>
 #include <xeno/perfc.h>
 #include <xeno/interrupt.h>
+#include <xeno/shadow.h>
 #include <asm/page.h>
 #include <asm/flushtlb.h>
 #include <asm/io.h>
@@ -182,6 +183,7 @@ static struct {
     struct task_struct *subject_p;
 } percpu_info[NR_CPUS] __cacheline_aligned;
 
+
 /*
  * init_frametable:
  * Initialise per-frame memory information. This goes directly after
@@ -768,6 +770,13 @@ void free_page_type(struct pfn_info *page, unsigned int type)
     default:
         BUG();
     }
+
+#ifdef CONFIG_SHADOW
+    // assume we're in shadow mode if PSH_shadowed set
+    if ( page->shadow_and_flags & PSH_shadowed )
+      unshadow_table( page-frame_table );
+#endif
+
 }
 
 
@@ -832,6 +841,10 @@ static int do_extended_command(unsigned long ptr, unsigned long val)
             put_page_and_type(&frame_table[pagetable_val(current->mm.pagetable)
                                           >> PAGE_SHIFT]);
             current->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
+#ifdef CONFIG_SHADOW            
+           current->mm.shadowtable = 
+             shadow_mk_pagetable(pfn << PAGE_SHIFT, current->mm.shadowmode);
+#endif
             invalidate_shadow_ldt();
             percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB;
         }
@@ -917,6 +930,10 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
     struct pfn_info *page;
     int rc = 0, okay = 1, i, cpu = smp_processor_id();
     unsigned int cmd;
+#ifdef CONFIG_SHADOW
+    unsigned long prev_spfn = 0;
+    l1_pgentry_t *prev_spl1e = 0;
+#endif
 
     perfc_incrc(calls_to_mmu_update); 
     perfc_addc(num_page_updates, count);
@@ -967,6 +984,13 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
                 {
                     okay = mod_l1_entry((l1_pgentry_t *)va, 
                                         mk_l1_pgentry(req.val)); 
+
+#ifdef CONFIG_SHADOW
+                   if ( okay && page->shadow_and_flags & PSH_shadowed )
+                       shadow_l1_normal_pt_update( req.ptr, req.val, 
+                                                   &prev_spfn, &prev_spl1e );
+#endif
+
                     put_page_type(page);
                 }
                 break;
@@ -976,6 +1000,11 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
                     okay = mod_l2_entry((l2_pgentry_t *)va, 
                                         mk_l2_pgentry(req.val),
                                         pfn); 
+#ifdef CONFIG_SHADOW
+                   if ( okay && page->shadow_and_flags & PSH_shadowed )
+                       shadow_l2_normal_pt_update( req.ptr, req.val );
+#endif
+
                     put_page_type(page);
                 }
                 break;
@@ -985,9 +1014,19 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
                     *(unsigned long *)va = req.val;
                     okay = 1;
                     put_page_type(page);
+
+#ifdef CONFIG_SHADOW
+                   if ( page->shadow_and_flags & PSH_shadowed )
+                       BUG(); 
+                       // at present, we shouldn't be shadowing such pages
+#endif
+
+
                 }
                 break;
             }
+
+check_pagetable( current->mm.pagetable, "mmu" ); // XXX XXX XXX XXX XXX
             
             put_page(page);
 
@@ -1031,11 +1070,23 @@ int do_mmu_update(mmu_update_t *ureqs, int count)
     if ( prev_pfn != 0 )
         unmap_domain_mem((void *)va);
 
+#ifdef CONFIG_SHADOW
+    if( prev_spl1e != 0 ) 
+        unmap_domain_mem((void *)prev_spl1e);
+#endif
+
     deferred_ops = percpu_info[cpu].deferred_ops;
     percpu_info[cpu].deferred_ops = 0;
 
     if ( deferred_ops & DOP_FLUSH_TLB )
-        write_cr3_counted(pagetable_val(current->mm.pagetable));
+    {
+#ifdef CONFIG_SHADOW
+        if ( unlikely(current->mm.shadowmode) )
+         write_cr3_counted(pagetable_val(current->mm.shadowtable));
+        else
+#endif   
+         write_cr3_counted(pagetable_val(current->mm.pagetable));
+    }
 
     if ( deferred_ops & DOP_RELOAD_LDT )
         (void)map_ldt_shadow_page(0);
@@ -1059,19 +1110,62 @@ int do_update_va_mapping(unsigned long page_nr,
     unsigned int cpu = p->processor;
     unsigned long deferred_ops;
 
+    perfc_incrc(calls_to_update_va);
+
     if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) )
         return -EINVAL;
 
+    // XXX when we make this support 4MB pages we should also
+    // deal with the case of updating L2s
+
     if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr], 
                                 mk_l1_pgentry(val))) )
         err = -EINVAL;
 
+#ifdef CONFIG_SHADOW
+
+    if ( unlikely(p->mm.shadowmode) )
+    {
+        unsigned long sval = 0;
+
+       // XXX this only works for l1 entries, with no translation
+
+        if ( (val & _PAGE_PRESENT) && (val & _PAGE_ACCESSED) )
+        {
+           sval = val;
+            if ( !(val & _PAGE_DIRTY) ) 
+               sval &= ~_PAGE_RW;
+       }
+
+       /*      printk("update_va_map: page_nr=%08lx val =%08lx sval =%08lx\n", 
+              page_nr, val, sval);*/
+
+       if ( __put_user( sval, ((unsigned long *) (&shadow_linear_pg_table[page_nr])) ) )
+       {
+           // Since L2's are guranteed RW, failure indicates the page
+           // was not shadowed, so ignore.
+            
+           //MEM_LOG("update_va_map: couldn't write update\n");        
+       }
+    }
+
+check_pagetable( p->mm.pagetable, "va" );
+
+#endif
+
     deferred_ops = percpu_info[cpu].deferred_ops;
     percpu_info[cpu].deferred_ops = 0;
 
     if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || 
          unlikely(flags & UVMF_FLUSH_TLB) )
-        write_cr3_counted(pagetable_val(p->mm.pagetable));
+    {
+#ifdef CONFIG_SHADOW
+        if ( unlikely(p->mm.shadowmode) )
+          write_cr3_counted(pagetable_val(p->mm.shadowtable));
+        else
+#endif
+          write_cr3_counted(pagetable_val(p->mm.pagetable));
+    }
     else if ( unlikely(flags & UVMF_INVLPG) )
         __flush_tlb_one(page_nr << PAGE_SHIFT);
 
diff --git a/xen/common/shadow.c b/xen/common/shadow.c
new file mode 100644 (file)
index 0000000..7756b7d
--- /dev/null
@@ -0,0 +1,618 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/mm.h>
+#include <xeno/shadow.h>
+#include <asm/domain_page.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW
+
+
+#if 1
+#define MEM_VLOG(_f, _a...)                             \
+  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+         current->domain , __LINE__ , ## _a )
+#else
+#define MEM_VLOG(_f, _a...) 
+#endif
+
+#if 0
+#define MEM_VVLOG(_f, _a...)                             \
+  printk("DOM%llu: (file=shadow.c, line=%d) " _f "\n", \
+         current->domain , __LINE__ , ## _a )
+#else
+#define MEM_VVLOG(_f, _a...) 
+#endif
+
+
+/********
+
+To use these shadow page tables, guests must not rely on the ACCESSED
+and DIRTY bits on L2 pte's being accurate -- they will typically all be set.
+
+I doubt this will break anything. (If guests want to use the va_update
+mechanism they've signed up for this anyhow...)
+
+********/
+
+
+pagetable_t shadow_mk_pagetable( unsigned long gptbase, 
+                                       unsigned int shadowmode )
+{
+       unsigned long gpfn, spfn=0;
+
+       MEM_VVLOG("shadow_mk_pagetable( gptbase=%08lx, mode=%d )",
+                        gptbase, shadowmode );
+
+       if ( unlikely(shadowmode) ) 
+       {
+               gpfn =  gptbase >> PAGE_SHIFT;
+               
+               if ( likely(frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
+               {
+                       spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+               }
+               else
+               {
+                       spfn = shadow_l2_table( gpfn );
+               }      
+       }
+
+       return mk_pagetable(spfn << PAGE_SHIFT);
+}
+
+void unshadow_table( unsigned long gpfn )
+{
+       unsigned long spfn;
+
+MEM_VLOG("unshadow_table %08lx\n", gpfn );
+
+       perfc_incrc(unshadow_table_count);
+
+       // this function is the same for both l1 and l2 tables
+
+       // even in the SMP guest case, there won't be a race here as
+    // this CPU was the one that cmpxchg'ed the page to invalid
+
+       spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+       frame_table[gpfn].shadow_and_flags=0;
+       frame_table[spfn].shadow_and_flags=0;
+
+#ifdef DEBUG
+       { // XXX delete me!
+               int i;
+               unsigned long * spl1e = map_domain_mem( spfn<<PAGE_SHIFT );
+
+               for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+               {
+                               spl1e[i] = 0xdead0000;
+                       }
+               unmap_domain_mem( spl1e );
+       }
+#endif
+
+       free_domain_page( &frame_table[spfn] );
+}
+
+
+unsigned long shadow_l2_table( unsigned long gpfn )
+{
+       struct pfn_info *spfn_info;
+       unsigned long spfn;
+       l2_pgentry_t *spl2e, *gpl2e;
+       int i;
+
+       MEM_VVLOG("shadow_l2_table( %08lx )",gpfn);
+
+       perfc_incrc(shadow_l2_table_count);
+
+    // XXX in future, worry about racing in SMP guests 
+    //      -- use cmpxchg with PSH_pending flag to show progress (and spin)
+
+       spfn_info = alloc_domain_page( NULL ); // XXX account properly later 
+
+    ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache
+
+       spfn = (unsigned long) (spfn_info - frame_table);
+
+       // mark pfn as being shadowed, update field to point at shadow
+       frame_table[gpfn].shadow_and_flags = spfn | PSH_shadowed;
+
+       // mark shadow pfn as being a shadow, update field to point at  pfn     
+       frame_table[spfn].shadow_and_flags = gpfn | PSH_shadow;
+       
+       // we need to do this before the linear map is set up
+       spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT);
+
+       // get hypervisor and 2x linear PT mapings installed 
+       memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], 
+           &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
+           HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
+    spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+    spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
+    spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
+        mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | 
+                      __PAGE_HYPERVISOR);
+
+       // can't use the linear map as we may not be in the right PT
+       gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT);
+
+       // proactively create entries for pages that are already shadowed
+       for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+       {
+               unsigned long spte = 0;
+
+#if 0  // Turns out this doesn't really help
+        unsigned long gpte;
+
+        gpte = l2_pgentry_val(gpl2e[i]);
+
+               if (gpte & _PAGE_PRESENT)
+               {
+                       unsigned long s_sh = 
+                               frame_table[ gpte>>PAGE_SHIFT ].shadow_and_flags;
+
+                       if( s_sh & PSH_shadowed ) // PSH_shadowed
+                       {
+                               if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
+                {
+                                       printk("Linear mapping detected\n");
+                                   spte = gpte & ~_PAGE_RW;
+                }
+                               else
+                {
+                                   spte = ( gpte & ~PAGE_MASK ) | (s_sh<<PAGE_SHIFT) |
+                                               _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED ;
+                               }
+                               // XXX should probably update guest to ACCESSED|DIRTY too...
+
+                   }
+
+               }
+#endif
+
+               spl2e[i] = mk_l2_pgentry( spte );
+
+       }
+
+       // its arguable we should 'preemptively shadow' a few active L1 pages
+    // to avoid taking a string of faults when 'jacking' a running domain
+
+    unmap_domain_mem( gpl2e );
+    unmap_domain_mem( spl2e );
+
+       MEM_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn);
+
+
+       return spfn;
+}
+
+
+int shadow_fault( unsigned long va, long error_code )
+{
+       unsigned long gpte, spte;
+
+       MEM_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code );
+
+       if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
+       {
+               MEM_VVLOG("shadow_fault - EXIT: read gpte faulted" );
+               return 0;  // propagate to guest
+       }
+
+       if ( ! (gpte & _PAGE_PRESENT) )
+       {
+               MEM_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte );
+               return 0;  // we're not going to be able to help
+    }
+
+    spte = gpte;
+
+       if ( error_code & 2  )  
+       {  // write fault
+               if ( gpte & _PAGE_RW )
+           {
+                       gpte |= _PAGE_DIRTY | _PAGE_ACCESSED;
+                       spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; 
+            // (we're about to dirty it anyhow...)
+               }
+               else
+               {   // write fault on RO page
+            MEM_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte );
+                       return 0; // propagate to guest
+                       // not clear whether we should set accessed bit here...
+               }
+       }
+       else
+       {
+               gpte |= _PAGE_ACCESSED;
+        spte |= _PAGE_ACCESSED; // about to happen anyway
+               if ( ! (gpte & _PAGE_DIRTY) ) 
+                       spte &= ~_PAGE_RW;  // force clear unless already dirty
+       }
+
+       MEM_VVLOG("plan: gpte=%08lx  spte=%08lx", gpte, spte );
+
+       // write back updated gpte
+    // XXX watch out for read-only L2 entries! (not used in Linux)
+       if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) )
+               BUG();  // fixme!
+
+    if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) )
+       { 
+               // failed:
+        //  the L1 may not be shadowed, or the L2 entry may be insufficient
+
+               unsigned long gpde, spde, gl1pfn, sl1pfn;
+
+        MEM_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx  spte=%08lx",gpte,spte );
+        gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]);
+
+        gl1pfn = gpde>>PAGE_SHIFT;
+
+        if ( ! (frame_table[gl1pfn].shadow_and_flags & PSH_shadowed ) )
+        {
+            // this L1 is NOT already shadowed so we need to shadow it
+            struct pfn_info *sl1pfn_info;
+            unsigned long *gpl1e, *spl1e;
+            int i;
+            sl1pfn_info = alloc_domain_page( NULL ); // XXX account properly! 
+            sl1pfn = sl1pfn_info - frame_table;
+
+            MEM_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn);
+               perfc_incrc(shadow_l1_table_count);
+
+            sl1pfn_info->shadow_and_flags = PSH_shadow | gl1pfn;
+            frame_table[gl1pfn].shadow_and_flags = PSH_shadowed | sl1pfn;
+
+            gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
+            spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<<PAGE_SHIFT);
+        
+
+            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
+            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] =  mk_l2_pgentry(spde);
+
+            gpl1e = (unsigned long *) &(linear_pg_table[
+                         (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]);
+
+            spl1e = (unsigned long *) &shadow_linear_pg_table[
+                         (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ];
+
+
+                       // XXX can only do this is the shadow/guest is writeable
+            // disable write protection if ! gpde & _PAGE_RW ????
+
+            for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
+               {
+#if SHADOW_OPTIMISE
+                if ( (gpl1e[i] & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
+                                (_PAGE_PRESENT|_PAGE_ACCESSED) )
+                {
+                    spl1e[i] = gpl1e[i];
+                    if ( !(gpl1e[i] & _PAGE_DIRTY) )
+                        spl1e[i] &= ~_PAGE_RW;
+                }
+                else
+#endif
+                    spl1e[i] = 0;
+            }
+
+
+        }
+        else
+        {
+            // this L1 was shadowed (by another PT) but we didn't have an L2
+            // entry for it
+
+            sl1pfn = frame_table[gl1pfn].shadow_and_flags & PSH_pfn_mask;
+
+            MEM_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn);
+
+                   spde = (gpde & ~PAGE_MASK) | (sl1pfn<<PAGE_SHIFT) | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY;
+
+            gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY;
+
+
+                       if ( unlikely( (sl1pfn<<PAGE_SHIFT) == (gl1pfn<<PAGE_SHIFT)  ) )
+                       {   // detect linear map, and keep pointing at guest
+                MEM_VLOG("4c: linear mapping ( %08lx )",sl1pfn);
+                               spde = (spde & ~PAGE_MASK) | (gl1pfn<<PAGE_SHIFT);
+                       }
+
+            linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde);
+            shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde);
+                       
+
+        }              
+
+    shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte);
+    // (we need to do the above even if we've just made the shadow L1)
+
+    } // end of fixup writing the shadow L1 directly failed
+       
+    perfc_incrc(shadow_fixup_count);
+
+    return 1; // let's try the faulting instruction again...
+
+}
+
+
+void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte,
+                                 unsigned long *prev_spfn_ptr,
+                                l1_pgentry_t **prev_spl1e_ptr )
+{
+    unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr;    
+    l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr;
+
+
+MEM_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%08lx\n",
+pa,gpte,prev_spfn, prev_spl1e);
+
+    // to get here, we know the l1 page *must* be shadowed
+
+    gpfn = pa >> PAGE_SHIFT;
+    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+
+    if ( spfn == prev_spfn )
+    {
+        spl1e = prev_spl1e;
+    }
+    else
+    {
+        if( prev_spl1e ) unmap_domain_mem( prev_spl1e );
+        spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
+           *prev_spfn_ptr  = spfn;
+           *prev_spl1e_ptr = spl1e;
+    }
+       // XXX we assume only pagetables can be shadowed; this will have to change
+       // to allow arbitrary CoW etc.
+
+    spte = 0;
+
+#if SHADOW_OPTIMISE
+       if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == 
+                (_PAGE_PRESENT|_PAGE_ACCESSED) )
+    {
+        spte = gpte;
+               if ( !(gpte & _PAGE_DIRTY ) )
+                       gpte &= ~ _PAGE_RW;
+       }
+#endif
+
+       spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = 
+               mk_l1_pgentry( spte );
+
+       unmap_domain_mem( (void *) spl1e );
+}
+
+void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte )
+{
+    unsigned long gpfn, spfn, spte;
+    l2_pgentry_t * sp2le;
+    unsigned long s_sh;
+
+    MEM_VVLOG("shadow_l2_normal_pt_update pa=%08lx, gpte=%08lx",pa,gpte);
+
+    // to get here, we know the l2 page has a shadow
+
+    gpfn = pa >> PAGE_SHIFT;
+    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+
+    sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT );
+    // no real need for a cache here
+
+    spte = 0;
+
+    s_sh = frame_table[gpte >> PAGE_SHIFT].shadow_and_flags;
+               
+       if ( s_sh ) // PSH_shadowed
+       {
+               if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) )
+               { 
+            // linear page table case
+                       spte = (gpte & ~_PAGE_RW) | _PAGE_DIRTY | _PAGE_ACCESSED; 
+           }
+           else
+                       spte = (gpte & ~PAGE_MASK) | (s_sh<<PAGE_SHIFT) | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED;
+
+       }
+
+       // XXXX Should mark guest pte as DIRTY and ACCESSED too!!!!!
+
+       sp2le[(pa & ~PAGE_MASK) / sizeof(l2_pgentry_t) ] = 
+               mk_l2_pgentry( spte );
+
+       unmap_domain_mem( (void *) sp2le );
+}
+
+
+#if SHADOW_DEBUG
+
+static int sh_l2_present;
+static int sh_l1_present;
+char * sh_check_name;
+
+#define FAIL(_f, _a...)                             \
+{printk("XXX %s-FAIL (%d,%d)" _f " g=%08lx s=%08lx\n",  sh_check_name, level, i, ## _a , gpte, spte ); BUG();}
+
+int check_pte( unsigned long gpte, unsigned long spte, int level, int i )
+{
+       unsigned long mask, gpfn, spfn;
+
+    if ( spte == 0 || spte == 0xdeadface || spte == 0x00000E00)
+        return 1;  // always safe
+
+    if ( !(spte & _PAGE_PRESENT) )
+        FAIL("Non zero not present spte");
+
+       if( level == 2 ) sh_l2_present++;
+       if( level == 1 ) sh_l1_present++;
+
+    if ( !(gpte & _PAGE_PRESENT) )
+        FAIL("Guest not present yet shadow is");
+
+    mask = ~(_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW|0xFFFFF000);
+
+    if ( (spte & mask) != (gpte & mask ) )
+               FAIL("Corrupt?");
+
+       if ( (spte & _PAGE_DIRTY ) && !(gpte & _PAGE_DIRTY) )
+               FAIL("Dirty coherence");
+
+       if ( (spte & _PAGE_ACCESSED ) && !(gpte & _PAGE_ACCESSED) )
+               FAIL("Accessed coherence");
+
+       if ( (spte & _PAGE_RW ) && !(gpte & _PAGE_RW) )
+               FAIL("RW coherence");
+
+       if ( (spte & _PAGE_RW ) && !((gpte & _PAGE_RW) && (gpte & _PAGE_DIRTY) ))
+               FAIL("RW2 coherence");
+       
+       spfn = spte>>PAGE_SHIFT;
+       gpfn = gpte>>PAGE_SHIFT;
+
+       if ( gpfn == spfn )
+    {
+               if ( level > 1 )
+                       FAIL("Linear map ???");                  // XXX this will fail on BSD
+
+#if 0 // might be a RO mapping of a page table page
+               if ( frame_table[gpfn].shadow_and_flags != 0 )
+        {
+                       FAIL("Should have been shadowed g.sf=%08lx s.sf=%08lx", 
+                                frame_table[gpfn].shadow_and_flags,
+                                frame_table[spfn].shadow_and_flags);
+        }
+               else
+#endif
+                       return 1;
+       }
+       else
+       {
+               if ( level < 2 )
+                       FAIL("Shadow in L1 entry?");
+
+               if ( frame_table[gpfn].shadow_and_flags != (PSH_shadowed | spfn) )
+                       FAIL("spfn problem g.sf=%08lx s.sf=%08lx [g.sf]=%08lx [s.sf]=%08lx", 
+                                frame_table[gpfn].shadow_and_flags,
+                                frame_table[spfn].shadow_and_flags,
+                                frame_table[frame_table[gpfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags,
+                                frame_table[frame_table[spfn].shadow_and_flags&PSH_pfn_mask].shadow_and_flags
+                                );
+
+               if ( frame_table[spfn].shadow_and_flags != (PSH_shadow | gpfn) )
+                       FAIL("gpfn problem g.sf=%08lx s.sf=%08lx", 
+                                frame_table[gpfn].shadow_and_flags,
+                                frame_table[spfn].shadow_and_flags);
+
+       }
+
+       return 1;
+}
+
+
+int check_l1_table( unsigned long va, unsigned long g2, unsigned long s2 )
+{
+       int j;
+       unsigned long *gpl1e, *spl1e;
+
+       gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]);
+       spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]);
+
+
+       for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
+       {
+               unsigned long gpte = gpl1e[j];
+               unsigned long spte = spl1e[j];
+               
+               check_pte( gpte, spte, 1, j );
+       }
+
+       return 1;
+}
+
+#define FAILPT(_f, _a...)                             \
+{printk("XXX FAILPT" _f "\n", ## _a ); BUG();}
+
+int check_pagetable( pagetable_t pt, char *s )
+{
+       unsigned long gptbase = pagetable_val(pt);
+       unsigned long gpfn, spfn;
+       int i;
+       l2_pgentry_t *gpl2e, *spl2e;
+
+return 1;
+
+       sh_check_name = s;
+
+    MEM_VVLOG("%s-PT Audit",s);
+
+       sh_l2_present = sh_l1_present = 0;
+
+       gpfn =  gptbase >> PAGE_SHIFT;
+
+       if ( ! (frame_table[gpfn].shadow_and_flags & PSH_shadowed) )
+       {
+               printk("%s-PT %08lx not shadowed\n", s, gptbase);
+
+               if( frame_table[gpfn].shadow_and_flags != 0 ) BUG();
+
+               return 0;
+       }
+       
+    spfn = frame_table[gpfn].shadow_and_flags & PSH_pfn_mask;
+
+       if ( ! frame_table[gpfn].shadow_and_flags == (PSH_shadowed | spfn) )
+               FAILPT("ptbase shadow inconsistent1");
+
+       if ( ! frame_table[spfn].shadow_and_flags == (PSH_shadow | gpfn) )
+               FAILPT("ptbase shadow inconsistent2");
+
+
+       // use the linear map to get a pointer to the L2
+       gpl2e = (l2_pgentry_t *) &(linear_l2_table[0]);
+       spl2e = (l2_pgentry_t *) &(shadow_linear_l2_table[0]);
+
+       // check the whole L2
+       for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+       {
+               unsigned long gpte = l2_pgentry_val(gpl2e[i]);
+               unsigned long spte = l2_pgentry_val(spl2e[i]);
+
+               check_pte( gpte, spte, 2, i );
+       }
+
+
+       // go back and recurse
+       for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
+       {
+               unsigned long gpte = l2_pgentry_val(gpl2e[i]);
+               unsigned long spte = l2_pgentry_val(spl2e[i]);
+
+               if ( spte )        
+                       check_l1_table( 
+                               i<<L2_PAGETABLE_SHIFT,
+                               gpte>>PAGE_SHIFT, spte>>PAGE_SHIFT );
+
+       }
+
+
+       MEM_VVLOG("PT verified : l2_present = %d, l1_present = %d\n",
+                  sh_l2_present, sh_l1_present );
+       
+       return 1;
+}
+
+
+#endif
+
+
+#endif // CONFIG_SHADOW
+
+
+
index 3dd29864922c0e78b8cb697a8af80f59b0892434..0496f481d9e15ed32f43d2d346a17da629075b35 100644 (file)
@@ -40,6 +40,9 @@
 
 #define CONFIG_XEN_ATTENTION_KEY 1
 
+#define CONFIG_SHADOW 1
+
+
 #define HZ 100
 
 /*
@@ -68,7 +71,7 @@
  */
 #define MAX_MONITOR_ADDRESS   (16*1024*1024)
 #define MAX_DMA_ADDRESS       (16*1024*1024)
-#define MAX_DIRECTMAP_ADDRESS (44*1024*1024)
+#define MAX_DIRECTMAP_ADDRESS (40*1024*1024) // XXX was 44
 /* And the virtual addresses for the direct-map region... */
 #define DIRECTMAP_VIRT_START  (READONLY_MPT_VIRT_END)
 #define DIRECTMAP_VIRT_END    (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
 /* Next 4MB of virtual address space is used as a linear p.t. mapping. */
 #define LINEAR_PT_VIRT_START  (DIRECTMAP_VIRT_END)
 #define LINEAR_PT_VIRT_END    (LINEAR_PT_VIRT_START + (4*1024*1024))
+/* Next 4MB of virtual address space is used as a shadow linear p.t. map. */
+#define SH_LINEAR_PT_VIRT_START  (LINEAR_PT_VIRT_END)
+#define SH_LINEAR_PT_VIRT_END    (SH_LINEAR_PT_VIRT_START + (4*1024*1024))
 /* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
-#define PERDOMAIN_VIRT_START  (LINEAR_PT_VIRT_END)
+#define PERDOMAIN_VIRT_START  (SH_LINEAR_PT_VIRT_END)
 #define PERDOMAIN_VIRT_END    (PERDOMAIN_VIRT_START + (4*1024*1024))
 #define GDT_VIRT_START        (PERDOMAIN_VIRT_START)
 #define GDT_VIRT_END          (GDT_VIRT_START + (64*1024))
index a4339d64dd6486b83a074ca97ff93bc58f80ccb0..64b5cf73a07095208692ef42d702cfe16383777f 100644 (file)
@@ -91,6 +91,7 @@ typedef struct { unsigned long pt_lo; } pagetable_t;
 #include <asm/flushtlb.h>
 
 #define linear_pg_table ((l1_pgentry_t *)LINEAR_PT_VIRT_START)
+#define linear_l2_table ((l2_pgentry_t *)(LINEAR_PT_VIRT_START+(LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
 
 extern l2_pgentry_t idle_pg_table[ENTRIES_PER_L2_PAGETABLE];
 extern void paging_init(void);
index c7df85aa2837054ec1b90afa5e627a4c52137394..9766ac7b209be81d21fcd80860f4559dca801e71 100644 (file)
@@ -415,6 +415,12 @@ struct mm_struct {
      */
     l1_pgentry_t *perdomain_pt;
     pagetable_t  pagetable;
+
+#ifdef CONFIG_SHADOW
+    unsigned int shadowmode;  /* flags to control shadow table operation */
+    pagetable_t  shadowtable;
+#endif
+
     /* Current LDT details. */
     unsigned long ldt_base, ldt_ents, shadow_ldt_mapcnt;
     /* Next entry is passed to LGDT on domain switch. */
index 0774571a73ebe90f9711052bccc0f5dd9b7da5a6..c1df341a283c21916730b116f069d1d0dade4971 100644 (file)
@@ -67,6 +67,10 @@ struct pfn_info
     unsigned long       type_and_flags;
     /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
     unsigned long       tlbflush_timestamp;
+#ifdef CONFIG_SHADOW
+    /* Shadow page status: top bits flags, bottom bits are a pfn */
+    unsigned long       shadow_and_flags;  
+#endif
 };
 
  /* The following page types are MUTUALLY EXCLUSIVE. */
@@ -100,6 +104,7 @@ struct pfn_info
  /* 28-bit count of references to this frame. */
 #define PGC_count_mask                ((1<<28)-1)
 
+
 /* We trust the slab allocator in slab.c, and our use of it. */
 #define PageSlab(page)         (1)
 #define PageSetSlab(page)      ((void)0)
index f81b5bcba19d770e82189674521e0a1866e29086..0475b6371ec8a6f66e8db68617a54c1b97e3ca20 100644 (file)
@@ -19,6 +19,15 @@ PERFCOUNTER_CPU( need_flush_tlb_flush, "PG_need_flush tlb flushes" )
 
 PERFCOUNTER_CPU( calls_to_mmu_update, "calls_to_mmu_update" )
 PERFCOUNTER_CPU( num_page_updates, "num_page_updates" )
-
+PERFCOUNTER_CPU( calls_to_update_va, "calls_to_update_va_map" )
+PERFCOUNTER_CPU( page_faults, "page faults" )
+PERFCOUNTER_CPU( copy_user_faults, "copy_user faults" )
+PERFCOUNTER_CPU( map_domain_mem_count, "map_domain_mem count" )
+
+PERFCOUNTER_CPU( shadow_l2_table_count, "shadow_l2_table count" )
+PERFCOUNTER_CPU( shadow_l1_table_count, "shadow_l1_table count" )
+PERFCOUNTER_CPU( unshadow_table_count, "unshadow_table count" )
+PERFCOUNTER_CPU( shadow_fixup_count, "shadow_fixup count" )
+PERFCOUNTER_CPU( shadow_update_va_fail, "shadow_update_va_fail" )
 
 
diff --git a/xen/include/xeno/shadow.h b/xen/include/xeno/shadow.h
new file mode 100644 (file)
index 0000000..dca0126
--- /dev/null
@@ -0,0 +1,48 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
+
+#ifndef _XENO_SHADOW_H
+#define _XENO_SHADOW_H
+
+#ifdef CONFIG_SHADOW
+
+#include <xeno/config.h>
+#include <xeno/types.h>
+#include <xeno/mm.h>
+
+/* Shadow PT flag bits in pfn_info */
+#define PSH_shadowed   (1<<31) /* page has a shadow. PFN points to shadow */
+#define PSH_shadow         (1<<30) /* page is a shadow. PFN points to orig page */
+#define PSH_pending        (1<<29) /* page is in the process of being shadowed */
+#define PSH_pfn_mask   ((1<<21)-1)
+
+/* Shadow PT operation mode : shadowmode variable in mm_struct */
+#define SHM_test        (1<<0) /* just run domain on shadow PTs */
+#define SHM_logdirty    (1<<1) /* log pages that are dirtied */
+#define SHM_cow         (1<<2) /* copy on write all dirtied pages */
+#define SHM_translate   (1<<3) /* lookup machine pages in translation table */
+
+#define shadow_linear_pg_table ((l1_pgentry_t *)SH_LINEAR_PT_VIRT_START)
+#define shadow_linear_l2_table ((l2_pgentry_t *)(SH_LINEAR_PT_VIRT_START+(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT-L1_PAGETABLE_SHIFT))))
+
+extern pagetable_t shadow_mk_pagetable( unsigned long gptbase, unsigned int shadowmode );
+extern void unshadow_table( unsigned long gpfn );
+extern unsigned long shadow_l2_table( unsigned long gpfn );
+extern int shadow_fault( unsigned long va, long error_code );
+extern void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, 
+                                                                               unsigned long *prev_spfn_ptr,
+                                                                               l1_pgentry_t **prev_spl1e_ptr  );
+extern void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte );
+
+
+#define SHADOW_DEBUG 0
+#define SHADOW_OPTIMISE 1
+
+#if SHADOW_DEBUG
+extern int check_pagetable( pagetable_t pt, char *s );
+#else
+#define check_pagetable( pt, s )
+#endif
+
+
+#endif
+#endif
index fbd9be63c96f4ffb3ede9e64885839c4b7591790..bb25e6a2b9b4e84098edc6b426ce40897a06bf87 100644 (file)
@@ -28,6 +28,7 @@
 #include <xeno/init.h>
 #include <xeno/module.h>
 #include <xeno/event.h>
+#include <xeno/shadow.h>
 #include <asm/domain_page.h>
 #include <asm/pgalloc.h>
 #include <asm/io.h>
@@ -488,7 +489,7 @@ struct netif_rx_stats netdev_rx_stat[NR_CPUS];
 void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
 {
     rx_shadow_entry_t *rx;
-    unsigned long *ptep, pte; 
+    unsigned long *ptep, pte, new_pte
     struct pfn_info *old_page, *new_page, *pte_page;
     unsigned short size;
     unsigned char  offset, status = RING_STATUS_OK;
@@ -530,10 +531,12 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
     wmb(); /* Get type count and set flush bit before updating PTE. */
 
     pte = *ptep;
+
+    new_pte = (pte & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT |
+                          ((new_page - frame_table) << PAGE_SHIFT);
+
     if ( unlikely(pte & _PAGE_PRESENT) || 
-         unlikely(cmpxchg(ptep, pte, 
-                          (pte & ~PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT |
-                          ((new_page - frame_table) << PAGE_SHIFT))) != pte )
+         unlikely(cmpxchg(ptep, pte, new_pte)) != pte )
     {
         DPRINTK("PTE was modified or reused! %08lx %08lx\n", pte, *ptep);
         unmap_domain_mem(ptep);
@@ -543,6 +546,22 @@ void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
         goto out;
     }
 
+
+#ifdef CONFIG_SHADOW
+    if ( pte_page->shadow_and_flags & PSH_shadowed )
+    {
+        unsigned long spte_pfn = pte_page->shadow_and_flags & PSH_pfn_mask;
+       unsigned long *sptr = map_domain_mem( (spte_pfn<<PAGE_SHIFT) |
+                       (((unsigned long)ptep)&~PAGE_MASK) );
+
+        // save the fault later
+       *sptr = new_pte;
+
+       unmap_domain_mem( sptr );
+    }
+#endif
+
+
     machine_to_phys_mapping[new_page - frame_table] 
         = machine_to_phys_mapping[old_page - frame_table];
     
@@ -2068,6 +2087,8 @@ static void get_rx_bufs(net_vif_t *vif)
 
         pte_pfn  = rx.addr >> PAGE_SHIFT;
         pte_page = &frame_table[pte_pfn];
+
+       //printk("MMM %08lx ", rx.addr);
             
         /* The address passed down must be to a valid PTE. */
         if ( unlikely(pte_pfn >= max_page) ||
@@ -2081,7 +2102,7 @@ static void get_rx_bufs(net_vif_t *vif)
         
         ptep = map_domain_mem(rx.addr);
         pte  = *ptep;
-        
+       //printk("%08lx\n",pte);        
         /* We must be passed a valid writeable mapping to swizzle. */
         if ( unlikely((pte & (_PAGE_PRESENT|_PAGE_RW)) != 
                       (_PAGE_PRESENT|_PAGE_RW)) ||
@@ -2092,6 +2113,22 @@ static void get_rx_bufs(net_vif_t *vif)
             make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
             goto rx_unmap_and_continue;
         }
+
+#ifdef CONFIG_SHADOW
+       {
+           if ( frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_shadowed )
+             {
+               unsigned long spfn = 
+                 frame_table[rx.addr>>PAGE_SHIFT].shadow_and_flags & PSH_pfn_mask;
+               unsigned long * sptr = map_domain_mem( (spfn<<PAGE_SHIFT) | (rx.addr&~PAGE_MASK) );
+
+               *sptr = 0;
+               unmap_domain_mem( sptr );
+
+             }
+
+       }
+#endif
         
         buf_pfn  = pte >> PAGE_SHIFT;
         buf_page = &frame_table[buf_pfn];
@@ -2112,6 +2149,8 @@ static void get_rx_bufs(net_vif_t *vif)
             put_page_and_type(pte_page);
             make_rx_response(vif, rx.id, 0, RING_STATUS_BAD_PAGE, 0);
             goto rx_unmap_and_continue;
+
+           // XXX IAP should SHADOW_CONFIG do something here?
         }
 
         /*